# Read in data
stop_frisk_df =
# Read in data from internet
GET("https://www1.nyc.gov/assets/nypd/downloads/excel/analysis_and_planning/stop-question-frisk/sqf-2016.csv") %>%
content("parsed") %>%
# Clean and fix names of columns
janitor::clean_names() %>%
rename(
precinct = pct,
date_stop = datestop,
time_stop = timestop,
stop_in_out = inout,
obs_time_min = perobs,
stop_time_min = perstop,
arst_made = arstmade,
off_in_unif = offunif,
hair_col = haircolr,
eye_col = eyecolor,
other_feature = othfeatr,
boro = city
) %>%
mutate(
# Combine height columns
height_inch = ht_feet * 12 + ht_inch,
# Convert date to proper format
date_stop = mdy(date_stop),
# Convert time to proper format
time_stop = hm(time_stop / 100),
# Recode to be more informative
stop_in_out = recode(stop_in_out, "I" = "inside", "O" = "outside"),
race = recode(
race,
"A" = "other",
"B" = "black",
"I" = "other",
"P" = "black-hispanic",
"Q" = "white-hispanic",
"W" = "white",
"U" = "other",
"Z" = "other"
),
hair_col = recode(
hair_col,
"BA" = "bald",
"BK" = "black",
"BL" = "blond",
"BR" = "brown",
"DY" = "other",
"FR" = "other",
"GY" = "other",
"RA" = "other",
"SN" = "other",
"SP" = "other",
"WH" = "other",
"XX" = "other",
"ZZ" = "other",
),
eye_col = recode(
eye_col,
"BK" = "black",
"BL" = "blue",
"BR" = "brown",
"DF" = "other",
"GR" = "other",
"GY" = "other",
"HA" = "other",
"MA" = "other",
"PK" = "other",
"VI" = "other",
"XX" = "other",
"Z" = "other",
),
build = recode(
build,
"H" = "heavy",
"M" = "medium",
"T" = "thin",
"U" = "other",
"Z" = "unknown"
),
# change boro columns to lowercase for consistency
boro = tolower(boro),
# change character datatypes to numeric
age = as.numeric(age),
obs_time_min = as.numeric(obs_time_min),
stop_time_min = as.numeric(stop_time_min)
) %>%
# select columns for further analysis
select(precinct, date_stop, time_stop, stop_in_out, obs_time_min, stop_time_min, arst_made, off_in_unif, frisked,
searched, rf_vcrim, rf_othsw, rf_attir:ac_evasv, cs_furtv:cs_other, rf_knowl, sb_hdobj:sb_admis, rf_furt,
rf_bulg, sex, race, age, height_inch, weight:build, boro, xcoord, ycoord) %>%
# change all columns that have Y/N to 1/0
mutate_at(vars(arst_made:rf_bulg), funs(recode(., "Y" = "1", "N" = "0"))) %>%
# change binary columns to numeric instead of character
mutate_at(vars(arst_made:rf_bulg), funs(as.numeric(.))) %>%
# converts all character variables to factors (this does the same as the for loop)
mutate_if(is.character, as.factor) %>%
# remove the single row of NAs
filter(!is.na(build))
Converting X Y Coordinates
test_df=
stop_frisk_df %>%
select(xcoord, ycoord) %>%
drop_na()
coordinates(test_df) <- c("xcoord","ycoord")
proj4string(test_df) <- CRS("+init=epsg:2263")
CRS.new <- CRS("+init=epsg:4326")
test_df_new <- spTransform(test_df, CRS.new)
test_df_new <- data.frame(longitude = coordinates(test_df_new)[,1], latitude = coordinates(test_df_new)[,2])
Adding Precinct Data & Converting X/Y Coords
nypd_prec =
st_read("./data/nypp/geo_export_55605c77-3922-4b7a-bd65-18d11225a91f.shp")
## Reading layer `geo_export_55605c77-3922-4b7a-bd65-18d11225a91f' from data source `/Users/laurenfranks/Documents/Columbia/Data Science/final project/group_13_final_proj/data/nypp/geo_export_55605c77-3922-4b7a-bd65-18d11225a91f.shp' using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 3 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## epsg (SRID): 4326
## proj4string: +proj=longlat +ellps=WGS84 +no_defs
long_lat_df =
stop_frisk_df %>%
select(xcoord, ycoord) %>%
drop_na()
coordinates(long_lat_df) <- c("xcoord", "ycoord")
proj4string(long_lat_df) <- CRS("+init=epsg:2263")
long_lat_df <- spTransform(long_lat_df, CRS("+init=epsg:4326"))
long_lat_df <- data.frame(longitude = coordinates(long_lat_df)[,1], latitude = coordinates(long_lat_df)[,2])
stop_frisk_map_df =
stop_frisk_df %>%
drop_na(xcoord) %>%
bind_cols(long_lat_df) %>%
select(-xcoord, -ycoord) %>%
mutate(
sex = recode(sex, "M" = "male", "F" = "female"),
label = str_c("Gender: ", sex, "<br/>", "Race: ", race, "<br/>", "Age: ", age, "<br/>", "Build: ", build) %>% map(htmltools::HTML))
Creating Maps
Basic Location Maps
# Locations of Stops
stop_frisk_map_df %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addCircleMarkers(
~longitude, ~latitude,
radius = 1,
opacity = 0.6,
label = ~label)
# Locations of Frisks
stop_frisk_map_df %>%
filter(frisked==1) %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addCircleMarkers(
~longitude, ~latitude,
radius = 1,
opacity = 0.6,
label = ~label)
# Locations of Stops with Precincts Outlines
stop_frisk_map_df %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(data = nypd_prec,
color = "#000000",
fillColor = "#FFFFFF",
weight = 1,
smoothFactor = 0.5,
opacity = 1,
fillOpacity = 0.1) %>%
addCircleMarkers(
~longitude, ~latitude,
radius = 1,
opacity = 0.6,
label = ~label)
# Locations of Frisks with Precincts Outlines
stop_frisk_map_df %>%
filter(frisked==1) %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(data = nypd_prec,
color = "#000000",
fillColor = "#FFFFFF",
weight = 1,
smoothFactor = 0.5,
opacity = 1,
fillOpacity = 0.1) %>%
addCircleMarkers(
~longitude, ~latitude,
radius = 1,
opacity = 0.6,
label = ~label)
Amount of Stops/Frisks/Arrests by Precinct
# Colored Area based on Number of Stops with Information on # of Frisks by Precinct
stops_by_prec =
stop_frisk_df %>%
group_by(precinct) %>%
summarize(totalstops = n()) %>%
drop_na()
frisks_by_prec =
stop_frisk_df %>%
group_by(precinct) %>%
filter(frisked==1) %>%
summarize(totalfrisks = n())
friskorsearch_by_prec =
stop_frisk_df %>%
group_by(precinct) %>%
filter(frisked==1|searched==1) %>%
summarize(totalfriskorsearch = n())
arrests_by_prec =
stop_frisk_df %>%
group_by(precinct) %>%
filter(arst_made==1) %>%
summarize(totalarrests = n())
nypd_prec =
inner_join(nypd_prec, stops_by_prec, by="precinct") %>%
inner_join(., frisks_by_prec, by="precinct") %>%
inner_join(., arrests_by_prec, by="precinct") %>%
inner_join(., friskorsearch_by_prec, by="precinct")
pal <- colorNumeric("YlOrRd", domain = range(nypd_prec$totalstops))
pal2 <- colorNumeric("YlOrRd", domain = range(nypd_prec$totalfrisks))
pal3 <- colorNumeric("YlOrRd", domain = range(nypd_prec$totalarrests))
pal4 <- colorNumeric("YlOrRd", domain = range(nypd_prec$totalfriskorsearch))
nypd_prec =
nypd_prec %>%
mutate(
tsc = pal(totalstops),
tfc = pal2(totalfrisks),
tac = pal3(totalarrests),
tfsc = pal4(totalfriskorsearch),
label = str_c("Precinct: ", precinct, "<br/>", "Total Stops in 2016: ", totalstops, "<br/>", "Total Frisks or Searches in 2016: ", totalfriskorsearch, "<br/>", "Total Arrests in 2016: ", totalarrests) %>% map(htmltools::HTML)
)
Maps by Precinct
# Total Stops by Precinct
nypd_prec %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(label = ~label,
color = "#000000",
fillColor = ~tsc,
weight = 1,
smoothFactor = 0.5,
opacity = 1,
fillOpacity = 0.75,
highlightOptions = highlightOptions(color = "white",
weight = 2,
bringToFront = TRUE)) %>%
addLegend(pal = pal,
values = ~totalstops,
title = "Total Number of Stops",
position = "bottomright")
# Total Frisks or Searches by Precinct
nypd_prec %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(label = ~label,
color = "#000000",
fillColor = ~tfsc,
weight = 1,
smoothFactor = 0.5,
opacity = 1,
fillOpacity = 0.75,
highlightOptions = highlightOptions(color = "white",
weight = 2,
bringToFront = TRUE)) %>%
addLegend(pal = pal4,
values = ~totalfriskorsearch,
title = "Total Number of Frisks or Searches",
position = "bottomright")
# Total Arrests by Precinct
nypd_prec %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(label = ~label,
color = "#000000",
fillColor = ~tac,
weight = 1,
smoothFactor = 0.5,
opacity = 1,
fillOpacity = 0.75,
highlightOptions = highlightOptions(color = "white",
weight = 2,
bringToFront = TRUE)) %>%
addLegend(pal = pal3,
values = ~totalarrests,
title = "Total Number of Arrests",
position = "bottomright")
Additing Most Frequent Reason for Stops by Precinct
stop_reason_total =
stop_frisk_df %>%
select(precinct, cs_objcs:cs_lkout, cs_cloth, cs_drgtr, cs_furtv, cs_vcrim:cs_other, frisked) %>%
pivot_longer(
cs_objcs:cs_other,
names_to = "reason_stopped",
values_to = "stops"
) %>%
mutate(
reason_stopped = recode(
reason_stopped,
"cs_objcs" = "carrying suspicious object",
"cs_descr" = "fits a relevant description",
"cs_casng" = "casing a victim or location",
"cs_lkout" = "suspect acting as a lookout",
"cs_cloth" = "wearing clothes commonly used in crimes",
"cs_drgtr" = "actions indicative of drug transaction",
"cs_furtv" = "furtive movements",
"cs_vcrim" = "actions engaging in violent crime",
"cs_bulge" = "suspcious bulge",
"cs_other" = "other"
)) %>%
filter(stops == 1) %>%
group_by(reason_stopped, precinct) %>%
summarize(total =n()) %>%
ungroup() %>%
group_by(precinct) %>%
top_n(1, total) %>%
mutate(greatreasonstop = reason_stopped) %>%
select(precinct, greatreasonstop)
nypd_prec = left_join(nypd_prec, stop_reason_total, by="precinct")
nypd_prec =
nypd_prec %>%
mutate(
label = str_c("Precinct: ", precinct, "<br/>", "Total Stops in 2016: ", totalstops, "<br/>", "Total Frisks or Searches in 2016: ", totalfriskorsearch, "<br/>", "Total Arrests in 2016: ", totalarrests, "<br/>", "Most Frequent Reason for Stop: ", greatreasonstop) %>% map(htmltools::HTML)
)
Heatmap of Stops by Race
stops_by_race =
left_join(stop_frisk_map_df, stops_by_prec, by="precinct") %>%
group_by(precinct, race) %>%
summarize(stops_by_race = n()) %>%
left_join(., stops_by_prec, by="precinct") %>%
mutate(
percentstops = (stops_by_race/totalstops)*100
)
palfact <- colorFactor("viridis", domain = stops_by_race$race)
max_stops_by_race =
stops_by_race %>%
group_by(precinct) %>%
top_n(1, percentstops) %>%
mutate(maxstoppedrace = race) %>%
select(precinct, maxstoppedrace)
nypd_prec = left_join(nypd_prec, max_stops_by_race, by="precinct")
nypd_prec %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(label = ~maxstoppedrace,
color = "#000000",
fillColor = ~palfact(maxstoppedrace),
weight = 1,
smoothFactor = 0.5,
opacity = 1,
fillOpacity = 0.75
) %>%
addLegend(pal = palfact,
values = ~maxstoppedrace,
title = "Race Making Up the Largest <br/>
Percentage Stops",
position = "bottomright")
Combining to Try Layering Functioning (but this can be accomplished with Shiny as well)
stop_frisk_map_df %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addCircleMarkers(
~longitude, ~latitude,
radius = 1,
opacity = 0.6,
group = "Individual Stops") %>%
addPolygons(data = nypd_prec,
label = ~label,
color = "#000000",
fillColor = ~tfc,
weight = 1,
smoothFactor = 0.5,
opacity = 1,
fillOpacity = 0.75,
group = "Police Precincts"
) %>%
addLegend(data = nypd_prec,
pal = pal,
values = ~totalstops,
title = "Total Number of Stops",
position = "bottomright",
group = "Police Precincts"
) %>%
addLayersControl(
overlayGroups = c("Individual Stops", "Police Precincts"),
options = layersControlOptions(collapsed = FALSE)
)
Testing Additional Layers for Shiny
nypd_prec %>%
leaflet() %>%
addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addCircleMarkers(data = stop_frisk_map_df,
~longitude, ~latitude,
radius = 1,
opacity = 0.6,
label = ~label,
group = "Individual Stops") %>%
addPolygons(label = ~label,
color = "#000000",
fillColor = ~tac,
weight = 1,
smoothFactor = 0.5,
opacity = 1,
fillOpacity = 0.75,
group = "Police Precinct Info") %>%
addLegend(pal = pal3,
values = ~totalarrests,
title = "Total Number of Arrests",
position = "bottomright",
group = "Police Precinct Info") %>%
addPolylines(label = ~maxstoppedrace,
color = ~palfact(maxstoppedrace),
weight = 1.5,
smoothFactor = 0.5,
opacity = 1,
fillOpacity = 0.5,
highlightOptions = highlightOptions(weight = 4,
bringToFront = TRUE),
group = "Race of Largest Percentage of Stops") %>%
addLegend(pal = palfact,
values = ~maxstoppedrace,
title = "Race Making Up the Largest <br/>
Percentage Stops",
position = "bottomright",
group = "Race of Largest Percentage of Stops") %>%
addLayersControl(overlayGroups = c("Individual Stops", "Police Precinct Info", "Race of Largest Percentage of Stops"),
options = layersControlOptions(collapsed = FALSE)) %>%
hideGroup(c("Police Precinct Info", "Race of Largest Percentage of Stops"))
Row
Number of Stops by Month
renderPlot({
if (input[["analysis_type"]] == "Individual Borough") {
stop_frisk_df %>%
filter(
boro == input[["boro_choice"]]
) %>%
mutate(
month_stop = month(date_stop)
) %>%
filter(
month_stop != is_null(month_stop)
) %>%
group_by(month_stop) %>%
summarize(
count = n()
) %>%
ggplot(aes(x = month_stop, y = count)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(
x = "Month",
y = "Number of Stops"
)
} else {
stop_frisk_df %>%
mutate(
month_stop = month(date_stop)
) %>%
filter(
month_stop != is_null(month_stop)
) %>%
group_by(month_stop) %>%
summarize(
count = n()
) %>%
ggplot(aes(x = month_stop, y = count)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(
x = "Month",
y = "Number of Stops"
)
}
})
Number of Stops by Hour
renderPlot({
if (input[["analysis_type"]] == "Individual Borough") {
stop_frisk_df %>%
filter(
boro == input[["boro_choice"]]
) %>%
mutate(
hour_stop = hour(time_stop)
) %>%
group_by(hour_stop) %>%
summarize(
count = n()
) %>%
ggplot(aes(x = hour_stop, y = count)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(
x = "Hour",
y = "Number of Stops"
)
} else {
stop_frisk_df %>%
mutate(
hour_stop = hour(time_stop)
) %>%
group_by(hour_stop) %>%
summarize(
count = n()
) %>%
ggplot(aes(x = hour_stop, y = count)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(
x = "Hour",
y = "Number of Stops"
)
}
})